#delimit;
clear;
set memory 3000m;

global MY_IN_PATH   "[path where the GSOEP data is stored]"; 
global MY_OUT_PATH  "[path where your master data set will be stored]"; 
global MY_TEMP_PATH "[path where your temporary files are stored]";


 
global MY_OUT_FILE  ${MY_OUT_PATH}NEW_CNEF2.dta; 


/* --------------------------------------------------- */;
/* ----------------[ Merge datasets ]----------------- */;
/* --------------------------------------------------- */;


/* ----------------[ start with ppfadl.dta ]---------- */; 

#delimit;
use cid hid pid syear sex psample prgroup gebjahr loc1989 nett1
using "${MY_IN_PATH}ppfadl.dta";


gen long yearhhnr = (syear-1900)*1000000+hid;

sort pid;

* Number of annual household observations;
preserve;
collapse pid, by(yearhhnr);
sum pid;
restore;


save "${MY_OUT_FILE}", replace;



/* ----------------[ merge pequiv.dta ]--------------- */; 

#delimit;
merge 1:1 pid syear
using "${MY_IN_PATH}pequiv.dta",
keepusing(w11101 w11102 w11105 y11101 l11101 i11101 i11102 i11103 i11104 i11105 i11106 i11110 i11201 i11202 i11203 i11204 
i11206 i11210 ijob1 ijob2 iself imilt i13ly i14ly ixmas iholy igray iothy divdy d11104 d11105 
d11106 d11107 d11109 e11101 e11102 e11103 i11107 i11108 i11109 i11117 h11101 h11102 h11110 renty opery);

#delimit;
drop if _merge == 2;
drop _merge;

sort pid;


/* --------------[ merge hbrutto.dta ]---------------- */; 

#delimit;
merge m:1 hid syear
using "${MY_IN_PATH}hbrutto.dta",
keepusing(bula);
drop if _merge == 2;
drop _merge;
sort pid;


/* -----------------[ merge hl.dta ]------------------ */; 

#delimit;
merge m:1 hid syear
using "${MY_IN_PATH}hl.dta",
keepusing(hlc0005 hlf0088 hlf0087 hlc0112 hlf0007 hlf0095 hlf0096 hlc0113 hlc0114);
drop if _merge == 2;
drop _merge;
sort pid;

/* -----------------[ merge hgen.dta ]---------------- */; 

#delimit;
merge m:1 hid syear
using "${MY_IN_PATH}hgen.dta",
keepusing(hgowner hgmoveyr);
drop if _merge == 2;
drop _merge;
sort pid;

/* ---------------[ merge pgen.dta ]------------------ */; 

#delimit;
merge 1:1 pid syear
using "${MY_IN_PATH}pgen.dta",
keepusing(pgsbil pgsbila pgsbilo pgbbila pgbbil01 pgbbil02 pgbbil03 pgbbilo pglfs pgtatzt);
drop if _merge == 2;
drop _merge;
sort pid;


/* -----------------[ merge pl.dta ]------------------ */; 

#delimit;
merge 1:1 pid syear
using "${MY_IN_PATH}pl.dta",
keepusing (plb0021 plb0022 plb0057 plb0423);
drop if _merge == 2;
drop _merge;
sort pid; 


/* --------------------------------------------------- */;
/* ---------------[ rename variables ]---------------- */;
/* --------------------------------------------------- */;

rename hid hhnr;
rename pid persnr;
rename syear year;
rename plb0022 empstat;
rename pgtatzt tatzeit;
rename pglfs lfs;
rename hlf0088 mortgageown;
rename hlf0087 paymortgage;
rename hlc0112 mortgageother;
rename hlf0007 newowner;
rename hlf0095 imputedrent;
rename hlf0096 imputedrentdk;
rename hlc0005 hhnettoincmonth;
rename pgsbil psbil;
rename pgsbila psbila;
rename pgsbilo psbilo;
rename pgbbila pbbila;
rename pgbbil01 pbbil01;
rename pgbbil02 pbbil02;
rename pgbbil03 pbbil03;
rename pgbbilo pbbilo;
rename plb0057 selfemp;
rename plb0423 empstateast;
rename plb0021 unemp;
rename hgmoveyr einzug;
rename hgowner eigen;
rename hlc0113 creditdebtyn;
rename hlc0114 creditdebtamount;


/* --------------------------------------------------- */;
/* -----------------[ save dataset ]------------------ */;
/* --------------------------------------------------- */;


/* -----------[ balanced / unbalanced design ]-------- */; 

drop if nett1==-2 | nett1==-3;

gen help1 = 1 if nett1==1;
bysort persnr: egen help2=count(help1);
tab help2;
* delete individuals with 100% imputed income;
drop if help2==0;	
* keep if help2~=0 | (help2==0 & i11210==1);
drop help1 help2;

save "${MY_OUT_FILE}", replace;





